import Nzh from 'nzh/cn';
import { parseLines } from './parseLines';

export type DataItem = {
  articleId: string;
  sectionId: number;
  content: string;
};

export function parseData(text: string): DataItem[] {
  const zhNum = '零一二三四五六七八九十百千';
  const titleRegexp = new RegExp(`^\\s*[上下]卷·([${zhNum}]*) (.*)`, 'g');

  const lines = parseLines(text); // trim commented lines
  // console.log({ text, lines });

  const data = [] as DataItem[];
  let articleId = '';
  let sectionId = 0;
  lines.forEach((s) => {
    const m = titleRegexp.exec(s);
    if (m) {
      articleId = Nzh.decodeS(m[1]);
      sectionId = 0;
    } else {
      sectionId++;
      data.push({ articleId, sectionId, content: s });
    }
  });
  // console.log(data);
  return data;
}
